package cn.galudisu.spark._2_loadveritydata

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession

/** Case classes 有一个继承限制。仅能hold 22个属性(Scala 2.11已经解决这个限制)
  * 因为Scala库内仅实现到Tuple22和Function22，要解除这个限制，需要自定义伴生类和伴生对象。
  *
  * @author galudisu
  */
object DataFrameFromProductClass extends App {

  val conf       = new SparkConf().setAppName("DataWith33Atts").setMaster("local[2]")
  val sc         = SparkSession.builder().config(conf = conf).getOrCreate().sparkContext
  val sqlContext = SparkSession.builder().config(conf = conf).getOrCreate().sqlContext

  val rddOfStudents = convertCSVToStudents("csv/student-mat.csv", sc)

  import sqlContext.implicits._

  //Create DataFrame
  val studentDFrame = rddOfStudents.toDF()

  studentDFrame.printSchema()

  studentDFrame.show()

  def convertCSVToStudents(filePath: String, sc: SparkContext): RDD[Student] = {
    val rddOfStudents: RDD[Student] = sc.textFile(filePath).flatMap(eachLine => Student(eachLine))
    rddOfStudents
  }
}
